The possible impact of weather on crimes in Amsterdam

The possible impact of weather on crimes in Amsterdam#

import pandas as pd
from pandas import Timedelta
import numpy as np
import plotly.express as px
import seaborn as sns
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from IPython.display import display, Markdown
import plotly.io as pio
pio.renderers.default = "notebook"
import plotly.graph_objects as go
df = pd.read_csv("merged_weather_misdrijven_monthly_v3.csv")
df.columns = df.columns.str.strip()
df['TX'] = df['TX'] / 10  # TX is in tienden van °C
df['TG'] = df['TG'] / 10  # TG is in tienden van °C
display(Markdown("### Bicycle Theft vs. Maximum Temperature"))
display(Markdown("_Slight increase in thefts when it's warmer._"))

# 📈 Plot maken
plt.figure(figsize=(8, 5))
sns.regplot(
    x='TX',
    y='1.2.3 Diefstal van brom-, snor-, fietsen',
    data=df,
    scatter_kws={'alpha': 0.5}
)
plt.xlabel("Maximum Temperature (°C)")
plt.ylabel("Number of Bicycle Thefts")
plt.grid(True)
plt.tight_layout()
plt.show()

Bicycle Theft vs. Maximum Temperature

Slight increase in thefts when it’s warmer.

../_images/47cfb89b4535282af81ffbea693c0b2e36ddd69fc0d63eaba124a93283965728.png
# Titel en korte take-away boven de plot
display(Markdown("### Weather vs. Water-Related Influence Reports"))
display(Markdown("_Higher temperature and evaporation coincide with more 'under influence on boat' incidents._"))

df['year_month'] = pd.to_datetime(df['year_month'])

fig, ax = plt.subplots(figsize=(12,6))

ax.plot(df['year_month'], df['TX'], label='Max temperature (°C)', color='orange')
ax.plot(df['year_month'], df['EV24'], label='Evaporation (mm)', color='blue')
ax.plot(df['year_month'], df['3.4.2 Onder invloed (water)'], label='Under influence on boat', color='green')

# X-as labels
ax.set_xlabel('Month, grouped by year')
ax.set_ylabel('Value')
ax.set_title('Temperature, Evaporation and Water-Influence Reports Over Time')

# Zet alle maandticks maar zonder labels
ax.xaxis.set_major_locator(mdates.MonthLocator())
ax.xaxis.set_major_formatter(mdates.DateFormatter(''))

# Maak blokken per jaar (lichte achtergrond)
years = df['year_month'].dt.year.unique()
for year in years:
    start = pd.Timestamp(f'{year}-01-01')
    end = pd.Timestamp(f'{year}-12-31')
    ax.axvspan(start, end, color='lightgrey', alpha=0.2)

# Haak-achtige lijnen en jaartallabels onder de x-as
ylim = ax.get_ylim()
y_base = ylim[0] - 0.05*(ylim[1]-ylim[0])          # basislijn net onder x-as
y_hook_top = y_base + 0.02*(ylim[1]-ylim[0])       # top van haakjes

for year in years:
    start = pd.Timestamp(f'{year}-01-01')
    end = pd.Timestamp(f'{year}-12-31')

    # horizontale lijn (onder het jaarblok)
    ax.hlines(y=y_base, xmin=start, xmax=end, colors='black', linewidth=1.5)

    # verticale lijntjes aan begin en eind als haakjes
    ax.vlines(x=start, ymin=y_base, ymax=y_hook_top, colors='black', linewidth=1.5)
    ax.vlines(x=end, ymin=y_base, ymax=y_hook_top, colors='black', linewidth=1.5)

    # jaarlabel gecentreerd tussen de haakjes, iets onder de lijn
    mid = pd.Timestamp(f'{year}-07-01')
    ax.text(mid, y_base - 0.02*(ylim[1]-ylim[0]), str(year), ha='center', va='top', fontsize=12)

# Optioneel nog grid, legend en layout
ax.grid(True)
ax.legend()
fig.tight_layout()
start_lim = df['year_month'].min() - Timedelta(days=20)
end_lim = df['year_month'].max() + Timedelta(days=300)
ax.set_xlim(start_lim, end_lim)
plt.show()

Higher temperature and evaporation coincide with more ‘under influence on boat’ incidents.

../_images/32aacb40bd6d260959d684e1807f2819760d1e5b1fef0fcd4f4cb3e189efdcc4.png
plt.figure(figsize=(10,6))

sns.regplot(data=df, x='VVN', y='1.1.1 Diefstal/inbraak woning', scatter_kws={'s':50}, line_kws={'color':'red'})

plt.title('Lower Visibility May Be Linked to More Home Burglaries')

plt.xlabel('Average Monthly Minimum Visibility (VVN scale, higher = better visibility)')
plt.ylabel('Total Burglary Incidents per Month')

plt.grid(True)


plt.show()
../_images/d3486a2a4c095209f5654251265872a210701963e701c060d77519348137a68a.png
weer_vars = ['TG', 'RH', 'FG']
misdaad_vars = [
    '1.1.1 Diefstal/inbraak woning',
    '1.3.1 Ongevallen (weg)',
    '1.4.2 Moord, doodslag',
    '2.5.2 Winkeldiefstal',
    '1.2.4 Zakkenrollerij',
    '1.2.3 Diefstal van brom-, snor-, fietsen'
]

# Maak subset dataframe met weer en misdaad kolommen
df_subset = df[weer_vars + misdaad_vars]

# Bereken correlatie matrix
corr_matrix = df_subset.corr()

# Selecteer alleen correlaties tussen weer_vars (rijen) en misdaad_vars (kolommen)
corr_submatrix = corr_matrix.loc[weer_vars, misdaad_vars]

import matplotlib.pyplot as plt
import seaborn as sns

plt.figure(figsize=(12, 6))
sns.heatmap(corr_submatrix, annot=True, cmap='coolwarm', center=0)
plt.title('Correlation between Weather Variables and Crime Categories')
plt.xlabel('Crime Categories')
plt.ylabel('Weather Variables')
plt.xticks(rotation=45, ha='right')
plt.tight_layout()
plt.show()
../_images/d9c18c2ca9c3928da80f781b48d93a20299c580409b7ede2dd1d173d81ce9dca.png
# Map each month number to a season
season_map = {
    1: 'Winter', 2: 'Winter', 12: 'Winter',
    3: 'Spring', 4: 'Spring', 5: 'Spring',
    6: 'Summer', 7: 'Summer', 8: 'Summer',
    9: 'Autumn', 10: 'Autumn', 11: 'Autumn'
}

# Extract numeric month from 'year_month' and map to season
df['month'] = pd.to_datetime(df['year_month']).dt.month
df['Season'] = df['month'].map(season_map)

# Serious crime columns and their display names
serious_crimes = {
    '1.4.2 Moord, doodslag': 'Murder',
    '1.4.5 Mishandeling': 'Abuse',
    '3.7.4 Cybercrime': 'Cybercrime'
}

# Create pie charts
fig, axes = plt.subplots(1, 3, figsize=(18, 6))

for ax, (col, label) in zip(axes, serious_crimes.items()):
    # Total incidents per season for each crime
    season_totals = df.groupby('Season')[col].sum().reindex(['Winter', 'Spring', 'Summer', 'Autumn'])

    # Pie chart
    ax.pie(
        season_totals,
        labels=season_totals.index,
        autopct='%1.1f%%',
        startangle=90,
        colors=sns.color_palette("pastel")[0:4]
    )
    ax.set_title(f'Seasonal Distribution of {label}')

# Title and caption
fig.suptitle('Serious Crimes Occur Steadily Across All Seasons', fontsize=16)
plt.tight_layout()
plt.show()
../_images/0fe0b44a472508b6eff91ba51fd8393c95e5d85ee8cfa8a6445c1a5ab667fa38.png
crime_columns = {
    'Pickpocketing': '1.2.4 Zakkenrollerij',
    'Robbery': '1.4.6 Straatroof',
    'Public violence': '1.4.3 Openlijk geweld (persoon)',
    'Under influence on boat': '3.4.2 Onder invloed (water)'
}

def create_slider_plot(df):
    temps = [round(t, 1) for t in list(frange(5.0, 26.5, 0.5))]
    fig = go.Figure()

    # Voeg alle frames toe, één per temperatuur
    frames = []
    for temp in temps:
        lower, upper = temp - 0.5, temp + 0.5
        filtered = df[(df['TX'] >= lower) & (df['TX'] < upper)]
        y = [ (filtered[col].sum() / df[col].sum()) * 100 if df[col].sum() > 0 else 0
              for col in crime_columns.values() ]
        frames.append(go.Frame(
            data=[go.Bar(x=list(crime_columns.keys()), y=y)],
            name=f"{temp}"
        ))

    # Voeg eerste data toe als initiele trace
    fig.add_trace(frames[0].data[0])

    # Zet layout, slider en frames
    fig.update_layout(
        title="Crime Distribution by Temperature",
        yaxis=dict(range=[0,25], title="Pct of Total Incidents"),
        xaxis_title="Crime Type",
        width=800, height=600,
        updatemenus=[dict(
            type="buttons",
            showactive=False,
            buttons=[dict(label="Play", method="animate",
                          args=[None, {"frame": {"duration": 300, "redraw": True},
                                       "fromcurrent": True, "transition": {"duration": 0}}])]
        )],
        sliders=[dict(
            active=temps.index(20.0),
            currentvalue={"prefix": "Temp: "},
            pad={"t": 50},
            steps=[dict(label=f"{t}°C", method="animate", args=[[str(t)], {"frame": {"duration": 0}, "mode": "immediate"}])
                   for t in temps]
        )]
    )
    fig.frames = frames
    return fig

def frange(start, stop, step):
    while start <= stop:
        yield round(start, 1)
        start += step

fig = create_slider_plot(df)
fig.show()